
# set working directory
setwd("~/Research/Mycorrhizae/Fieldwork/Isotopes")

library(lme4)
library(nlme)
library(lmerTest)
library(olsrr)
library(MuMIn)
library(htmlTable)
library(ggplot2)
library(Rmisc)
library(SciViews)
library(gridExtra)
library(MASS)

# define predictive R^2 functions for later
PRESS <- function(linear.model) {
  #' calculate the predictive residuals
  pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
  #' calculate the PRESS
  PRESS <- sum(pr^2)
  
  return(PRESS)
}

pred_r_squared <- function(linear.model) {
  #' Use anova() to get the sum of squares for the linear model
  lm.anova <- anova(linear.model)
  #' Calculate the total sum of squares
  tss <- sum(lm.anova$'Sum Sq')
  # Calculate the predictive R^2
  pred.r.squared <- 1-PRESS(linear.model)/(tss)
  
  return(pred.r.squared)
}

#########################################
# CREATING GRAPHS OF RAW DATA AND MEANS
################################################

soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# remove bog sample
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# put distance 5 into 6 bin for analysis
for (i in 1:length(soil.data$Distance)){
  if (soil.data$Distance[i]==5){
    soil.data$Distance[i] <- 6
  }
}

# group Grey soil under Mineral
for (i in 1:length(soil.data$Distance)){
  if (soil.data$Type[i]=="Grey"){
    soil.data$Type[i] <- "Mineral"
  }
}

# by lab
soil.data <- soil.data[ which(soil.data$Lab == "WSU"),]

# test
hansen <- soil.data[which(soil.data$Stream=="Hansen"),]
happy <- soil.data[which(soil.data$Stream=="Happy"),]
yako <- soil.data[which(soil.data$Stream=="Yako"),]

soil.data$Stream <- as.factor(soil.data$Stream)

# examine data
boxplot(ph ~ Stream, data=soil.data)

# run anova
res_aov <- aov(ph ~ Stream, data=soil.data)
summary(res_aov)

# run tukey test
library(multcomp)
post_test <- glht(res_aov, linfct = mcp(Stream = "Tukey"))
summary(post_test)

# hansen has significantly higher organic soil C:N than yako
# hansen has significantly higher organic soil GWC than yako or happy
# hansen has significantly lower ph than happy

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Hansen"),]
soil.data <- soil.data[ which(soil.data$Stream=="Happy"),]
soil.data <- soil.data[ which(soil.data$Stream=="Yako"),]

# by hansen side
soil.data <- soil.data[ which(soil.data$Side=="SL"),]
soil.data <- soil.data[ which(soil.data$Side=="SR"),]

# mineral or organic
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]
soil.data <- soil.data[ which(soil.data$Type=="Mineral"),]

# choose transect
soil.data <- soil.data[ which(soil.data$Transect=="S6"),]

# choose distance
soil.data <- soil.data[ which(soil.data$Distance < 7),]

# examine C:N by Hansen bank organic 
tgc <- summarySE(soil.data, measurevar="d15N", groupvars=c("Side", "Distance"))
tgc
p1 <- ggplot(tgc[which(tgc$Side=="SL"),], aes(x=Distance, y=d15N)) + geom_point(position = position_dodge(.9), 
       stat = "summary", fun = "mean") + 
  geom_errorbar(aes(ymin=d15N-se, ymax=d15N+se), width=.2,position=position_dodge(.9)) +
  theme_classic() + 
  geom_point(data=soil.data[which(soil.data$Side=="SL"),], aes(x=Distance, y=d15N), 
             alpha=0.2) + ylim(-0.9, 12.83) + scale_x_reverse(breaks=c(1,6,10,20, 40, 60, 100)) 

p2 <- ggplot(tgc[which(tgc$Side=="SR"),], aes(x=Distance, y=d15N)) + geom_point(position = position_dodge(.9), 
      stat = "summary", fun = "mean") + 
  geom_errorbar(aes(ymin=d15N-se, ymax=d15N+se), width=.2,position=position_dodge(.9)) +
  theme_classic() + 
  geom_point(data=soil.data[which(soil.data$Side=="SR"),], aes(x=Distance, y=d15N), 
             alpha=0.2) + ylim(-0.9, 12.83) + scale_x_continuous(breaks=c(1,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)
ggsave(plot=p, width=5, height=3, dpi=300, filename = "CN_L.jpg")

# examine C:N by Hansen bank organic 
tgc <- summarySE(soil.data, measurevar="C.N", groupvars=c("Distance"))
tgc
p <- ggplot(tgc, aes(x=Distance, y=C.N)) + geom_point(position = position_dodge(.9), 
   stat = "summary", fun = "mean") + geom_line() +
  geom_errorbar(aes(ymin=C.N-se, ymax=C.N+se), width=.2,position=position_dodge(.9)) +
  theme_classic() + 
  geom_point(data=soil.data[which(soil.data$Side=="SR"),], aes(x=Distance, y=C.N), 
             alpha=0.2) + ylim(10.8, 33) #+ scale_x_reverse() 
p
ggsave(plot=p, width=5, height=3, dpi=300, filename = "C.N_orgsoil_20_R.jpg")

# mean and errors
p <- ggplot(tgc, aes(x=Side, y=dC13)) + 
  geom_errorbar(aes(ymin=dC13-se, ymax=dC13+se), width=.1) + geom_line() + 
  geom_point() + theme_bw() + scale_x_discrete(labels=c('Enhanced', 'Depleted')) + 
  ylab("Organic soil dC13") 
p

ggsave(plot=p, width=3, height=3, dpi=300, filename = "dC13_Hansen_orgsoil_byside.jpg")


# for specific distances use #scale_x_continuous(breaks = c(1,3,6,10,20,40,60,100))
# for d15N for BOTH UNM and WSU data, use ylim(-0.9, 12.83)
# for total N at Hansen, use ylim(0.006, 0.06)
# for soil pH at Hansen, use ylim(3.5,6)
# for percent C at Hansen, use ylim(1.9, 48.88)
# for percent N at Hansen, use ylim(0.15, 2.65)
# for C:N at Hansen, use ylim(10.8, 33)

#######################################################################
# CREATE MODEL PREDICTIONS WITH RAW DATA
# Soil C:N, d15N and dC13 by distance and bank at Hansen
#####################################################################
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
#soil.data <- soil.data[ which(soil.data$Lab == "WSU"),]

# remove bog sample
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Hansen"),]

# by organic or mineral soil
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]

# select only 1-20 m
soil.data <- soil.data[ which(soil.data$Distance < 21),]

# choose transect
soil.data <- soil.data[ which(soil.data$Transect=="S6"),]

# is distance normally distributed

# center Distance variable
#soil.data$Distance <- scale(soil.data$Distance, scale=FALSE)

# create ln of Distance column
soil.data$lnDistance <- ln(soil.data$Distance)

# create quadratic lnDistance column
soil.data$lnDistance2 <- soil.data$lnDistance^2
soil.data$Distance2 <- (soil.data$Distance)^2

##############################################
# C:N #
#########################################
# select data
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
soil.data <- soil.data[ which(soil.data$Lab == "WSU"),]

# remove bog sample
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
#soil.data <- soil.data[ which(soil.data$Stream=="Hansen"),]

# select only 1-20 m
#soil.data <- soil.data[ which(soil.data$Distance < 21),]

# by organic or mineral soil
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]
# using just distance doesn't work

test1 <- scale(soil.data$GWC, scale=FALSE)
soil.data$GWC <- test1[,1]

test2 <- scale(soil.data$ph, scale=FALSE)
soil.data$ph <- test2[,1]

soil.data$decomposing.carcass <- as.factor(soil.data$decomposing.carcass)
soil.data$carcass.deposition <- as.factor(soil.data$carcass.deposition)
soil.data$carcass.load <- as.numeric(soil.data$carcass.load)

# examine effect of distance on d15N ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(log(C.N) ~ 
                #Side + 
                ln(Distance) + 
                #Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                carcass.deposition + 
                carcass.load +
                GWC +
                ph +
                decomposing.carcass,
              data=soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(C.N ~ ln(Distance) + decomposing.carcass +
                       carcass.load + GWC + (1|Stream:Transect), data=soil.data)
summary(dredge.model)

dredge.model <- lm(d15N ~ ln(Distance) + GWC, data=soil.data)
summary(dredge.model)

dredge.model <- lmer(d15N ~ ln(Distance) + GWC + (1|Stream:Transect), data=soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(C.N ~ ln(Distance) + carcass.deposition + decomposing.carcass +
                                  carcass.load + GWC + (1|Stream:Transect), data=soil.data))

pred_r_squared(dredge.model)

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# all parameters for a SINGLE SIDE
all.parms<-lm(C.N ~ ln(Distance) + 
                I(ln(Distance)^2) + 
                I(ln(Distance)^3) + 
                I(ln(Distance)^4) +
                total.N + GWC,
              data=soil.data)
results<-dredge(all.parms)
subset(results, delta <1)
subset(results, delta == 0)

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
print(cbind(af,PctExp=afss/sum(afss)*100))

# calculate VIF
library(car)
car::vif(dredge.model)

# 1-20 organic soil
dredge.model <- lm(C.N ~ decomposing.carcass + GWC + ln(Distance) + Side + Side:ln(Distance), data=soil.data)
summary(dredge.model)
# only GWC and ln(distance) are significant

# 1-100 organic soil
dredge.model <- lm(C.N ~ decomposing.carcass + carcass.deposition + GWC + ln(Distance), data=soil.data)
summary(dredge.model)
# GWC and ln(distance) are significant

test <- lm(C.N ~ ln(Distance) + I(ln(Distance)^2) + I(ln(Distance)^3) + I(ln(Distance)^4), data=soil.data)
test <- lm(C.N ~ Distance + total.N + GWC, data=soil.data)

# for 1-100
dredge.model <- lm(C.N ~ Side + GWC + Side:I(ln(Distance)^2), data=soil.data)
summary(dredge.model)

# for 1-20
dredge.model <- lm(C.N ~ GWC + ln(Distance), data=soil.data)
summary(dredge.model)

# for 1-20 on LEFT SIDE
dredge.model <- lm(C.N ~ ln(Distance), data=soil.data)

# for 1-20 on RIGHT SIDE
dredge.model <- lm(C.N ~ GWC + I(ln(Distance)^3) + I(ln(Distance)^4) + total.N, data=soil.data)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(step.model)

mod1 <- lm(C.N ~ Side, data=soil.data)
mod2 <- lm(C.N ~ Side + GWC, data=soil.data)
mod3 <- lm(C.N ~ ln(Distance) + GWC, data=soil.data)
mod4 <- lm(C.N ~ ln(Distance), data=soil.data)
mod5 <- lm(C.N ~ ln(Distance) + Side, data=soil.data)
mod6 <- lm(C.N ~ ln(Distance) + Side + GWC, data=soil.data)
mod7 <- lm(C.N ~ GWC, data=soil.data)
#mod8a <- lm(C.N ~ Side + lnDistance + Side:lnDistance + Side:I(lnDistance^2) + GWC, data=soil.data)
#mod8b <- lm(C.N ~ Side + lnDistance + Side*lnDistance + Side*I(lnDistance^2) + GWC, data=soil.data)
mod8c <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=soil.data)
#mod8d <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + Side*I((ln(Distance))^2) + GWC, data=soil.data)
#mod8e <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + Side*lnDistance2 + GWC, data=soil.data)
#mod8f <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + lnDistance2 + Side*lnDistance2 + GWC, data=soil.data)
mod9 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance), data=soil.data)
mod10 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=soil.data)
mod11 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2), data=soil.data)
mod12 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=soil.data)
mod13 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=soil.data)
mod13a <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3) + GWC, data=soil.data)
mod13b <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3) + GWC, data=soil.data)
mod13c <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=soil.data)
mod14 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=soil.data)
mod14a <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=soil.data)
mod14b <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + GWC, data=soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod15a <- lm(C.N ~ Side:I(ln(Distance)^2) + GWC, data=soil.data)
mod15 <- lm(C.N ~ Side:I(ln(Distance)^3) + GWC, data=soil.data)
mod16 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE) + GWC, data=soil.data)
mod16a <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE) + GWC, data=soil.data)
mod16b <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE) + GWC, data=soil.data)
mod16c <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE) + GWC, data=soil.data)
mod16d <- lm(C.N ~ Side:poly(ln(Distance), 7, raw=TRUE) + GWC, data=soil.data)
mod17 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE), data=soil.data)
mod18 <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE), data=soil.data)
mod19 <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE), data=soil.data)
mod20 <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE), data=soil.data)
mod21 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE) + GWC, data=soil.data)
mod22 <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE) + GWC, data=soil.data)
mod23 <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE) + GWC, data=soil.data)
mod24 <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE) + GWC, data=soil.data)
mod25 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE), data=soil.data)
mod26 <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE), data=soil.data)
mod27 <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE), data=soil.data)
mod28 <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE), data=soil.data)

# include random effect for top models for 1-100
mod3.lmer <- lmer(C.N ~ ln(Distance) + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod6.lmer <- lmer(C.N ~ ln(Distance) + Side + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod21.lmer <- lmer(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE) + GWC + (1|Transect), data=soil.data, REML=FALSE)

# random effects for 1-20 m
mod14b.lmer <- lmer(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + total.N + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod3.lmer <- lmer(C.N ~ ln(Distance) + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod12.lmer <- lmer(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + total.N + GWC + (1|Transect), data=soil.data, REML=FALSE)

AIC(mod8a, mod8b, mod8c, mod8d, mod8e, mod8f)
results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8c, mod9, mod10, mod11, mod12,
    mod13, mod13a, mod13b, mod13c, mod14, mod14a, mod14b, mod16, 
    mod16a, mod16b, mod16c, mod16d, mod17, mod18, mod19, mod20, mod21, mod22, 
    mod23, mod24, mod25, mod26, mod27, mod28, step.model, dredge.model)
#results <- AIC(mod3, mod3a, mod4, mod7, dredge.model, step.model)
ordered <- results[order(-results$AIC),]
ordered
summary(dredge.model)

# for organic C:N 1-100, CHOSE DREDGE MODEL 
# manual models: best models 3, 6, but other top models too including 
# 8c, 12 and 21. Conflicted over using 21 (hump) or 3 (no hump), going with mod3
# random effect lowers AIC but removes hump from mod21

# for organic C:N 1-20, CHOSE STEP

# select the model that will be used to plot predictions
select.mod <- step.model

pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
           total.N = mean(soil.data$total.N[which(soil.data$Side==which.side)]),
           GWC = mean(soil.data$GWC[which(soil.data$Side==which.side)]),
           decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ geom_line(color="sienna") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
  y = C.N, group=Distance, color = NULL), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance") + theme_classic()+
  ylim(10,34) +
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      total.N = mean(soil.data$total.N[which(soil.data$Side==which.side)]),
                      GWC = mean(soil.data$GWC[which(soil.data$Side==which.side)]),
                      decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ geom_line(color="sienna") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
   y = C.N, group=Distance, color = NULL), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance") + theme_classic()+
  ylim(10,34) +
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "CN_20_orgsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "CN_20_orgsoil_R.jpg")

# for organic soil C:N 1-100, use limits ylim(10,34)
# for organic soil C:N 1-20, use limits ylim(11, 27.5)

# model diagnostics
# see if residuals are normally distributed
plot(density(mod8$residuals))
qqnorm(mod8$residuals)
qqline(mod8$residuals)
shapiro.test(mod8$residuals)
# plot residuals against predicted values to determine heteroscedacitty
plot(fitted(mod8),mod8$residuals)
abline(0,0)

#####################################################################
# d15N - here, include total N but do not include GWC
#####################################################################
# select data
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
soil.data <- soil.data[ which(soil.data$Lab == "WSU"),]

# remove bog sample
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Hansen"),]

# select only 1-20 m
soil.data <- soil.data[ which(soil.data$Distance < 21),]

# by organic or mineral soil
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]

# scale variables
test <- scale(soil.data$PercentN, scale=FALSE)
soil.data$PercentN <- test[,1]

test1 <- scale(soil.data$GWC, scale=FALSE)
soil.data$GWC <- test1[,1]

soil.data$decomposing.carcass <- as.factor(soil.data$decomposing.carcass)
soil.data$carcass.deposition <- as.factor(soil.data$carcass.deposition)
soil.data$carcass.load <- as.numeric(soil.data$carcass.load)

# for 1-20, use side and distance up to forth power and %N and GWC

# examine effect of distance on d15N ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(d15N ~ 
                #Side + 
                ln(Distance) + 
                #Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                PercentN +
                #C.N + 
                carcass.load + 
                decomposing.carcass + 
                #Transect + 
                carcass.deposition + 
                GWC,
              data=soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(d15N ~ ln(Distance) + PercentN + carcass.deposition + decomposing.carcass + GWC + (1|Stream:Transect),
                     data=soil.data)
summary(dredge.model)

dredge.model <- lmer(d15N ~ carcass.deposition + decomposing.carcass + carcass.load + GWC + ln(Distance) + (1|Stream:Transect) + PercentN, data=soil.data)
summary(dredge.model)

dredge.model <- lmer(d15N ~ carcass.deposition + decomposing.carcass + GWC + ln(Distance) + (1|Stream:Transect), data=soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(d15N ~ ln(Distance) + PercentN + carcass.load + carcass.deposition + decomposing.carcass + GWC + (1|Stream:Transect),
                                data=soil.data))

pred_r_squared(d)

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# for organic soil 1-100, top model
dredge.model <- lm(d15N ~ carcass.deposition + GWC + PercentN + ln(Distance) + decomposing.carcass, data=soil.data)
summary(dredge.model)
# significant variables are GWC, carcass deposition, and Percent N and distance

# for organic soil 1-20, top model
dredge.model <- lm(d15N ~ carcass.deposition + GWC + ln(Distance), data=soil.data)
summary(dredge.model)
# significant variables are GWC, carcass deposition (almost), and Percent N and distance

dredge.model <- lm(d15N ~ GWC + Side + ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=soil.data)
dredge.model <- lm(d15N ~ decomposing.carcass + GWC + PercentN, data=soil.data)

# for 1-100
dredge.model <- lm(d15N ~ decomposing.carcass + GWC + Side + ln(Distance) + PercentN + Transect + Side:ln(Distance) + Side:I(ln(Distance)^2), data=soil.data)
summary(dredge.model)

# for 1-20
dredge.model <- lm(d15N ~ GWC + Side + Side:I(ln(Distance)^2), data=soil.data)
summary(dredge.model)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(step.model)

# ln distance
mod1 <- lm(d15N ~ Side, data=soil.data)
mod2 <- lm(d15N ~ Side + PercentN, data=soil.data)
mod3 <- lm(d15N ~ ln(Distance) + PercentN, data=soil.data)
mod4 <- lm(d15N ~ ln(Distance), data=soil.data)
mod5 <- lm(d15N ~ ln(Distance) + Side, data=soil.data)
mod6 <- lm(d15N ~ ln(Distance) + Side + PercentN, data=soil.data)
mod7 <- lm(d15N ~ PercentN, data=soil.data)
mod8 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + PercentN, data=soil.data)
mod9 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance), data=soil.data)
mod10 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + PercentN, data=soil.data)
mod11 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=soil.data)
mod12 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC, data=soil.data)
mod13 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=soil.data)
mod14 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=soil.data)
mod14a <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN, data=soil.data)
mod14b <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN + GWC, data=soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod15a <- lm(d15N ~ Side:I(ln(Distance)^2) + PercentN + GWC, data=soil.data)
mod15 <- lm(d15N ~ Side:I(ln(Distance)^3) + PercentN + GWC, data=soil.data)
mod16a <- lm(d15N ~ Side:poly(ln(Distance), 5, raw=TRUE) + PercentN + GWC, data=soil.data)
mod16b <- lm(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC, data=soil.data)
mod16c <- lm(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC, data=soil.data)
mod16d <- lm(d15N ~ Side:poly(ln(Distance), 6, raw=TRUE) + PercentN + GWC, data=soil.data)
mod17 <- lm(d15N ~ Side:poly(ln(Distance), 5, raw=TRUE), data=soil.data)
mod18 <- lm(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE), data=soil.data)
mod19 <- lm(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE), data=soil.data)
mod20 <- lm(d15N ~ Side:poly(ln(Distance), 6, raw=TRUE), data=soil.data)

#adding transect as random effect increased AIC and did not change model
# predictions
mod12.lmer <- lmer(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod16b.lmer <- lmer(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC + (1|Transect), data=soil.data, REML=FALSE)
mod16c.lmer <- lmer(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC + (1|Transect), data=soil.data, REML=FALSE)

# untransformed distance to compare yielded no hump for 1-20 m

# model comparison
results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11, mod12,
    mod13, mod14, mod14a, mod14b, mod15, mod15a, mod16a, mod16b, 
    mod16c, mod16d, mod17, mod18, mod19, mod20, step.model, dredge.model)
ordered <- results[order(-results$AIC),]
ordered
summary(mod16a)

# ORGANIC 1-100
# using stepwise model is best!
# for organic 1-100, best model is mod12, 16b, 16c. 12 and 16c show slight 
# upward hump, 16b shows downward hump, and 16a (which is pretty close in AIC)
# shows downward trend followed up by hump (most interesting pattern but slightly
# outside of the <2 AIC. For now, going with 16c

# ORGANIC 1-20
# best models are dredge and step model which are the same!
# for organic 1-20, best model are 16b, 16c, and 14b. Conflicting because
# 14b has hump while 16b and 16c do not. Stepwise models don't have hump 
# depending on whether you include third and fourth power terms. Dredge doesn't
# have hump

# select the model that will be used to plot predictions
select.mod <- dredge.model

# plot raw data with model estimates for LEFT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
           PercentN = mean(soil.data$PercentN[which(soil.data$Side==which.side)]), 
           GWC=mean(soil.data$GWC[which(soil.data$Side==which.side)]),
           Transect="S4",
           decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna1") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
  y = d15N, group=Distance), alpha = 0.1, color="sienna1")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "d15N", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-0.2,13) + 
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

# plot raw data with model estimates for RIGHT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
           PercentN = mean(soil.data$PercentN[which(soil.data$Side==which.side)]), 
           GWC=mean(soil.data$GWC[which(soil.data$Side==which.side)]),
           Transect="S4",
           decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna1") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
  y = d15N, group=Distance), alpha = 0.1, color="sienna1")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "d15N", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-0.2,13) + 
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "d15N_20_orgsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "d15N_20_orgsoil_R.jpg")

# for organic d15N 1-100 limits, use  ylim(-0.2,13)
# for organic d15N 1-20 limits, use ylim(2.5,13)
# for organic d15N 1-10 limits, use ylim(5,13)

# non parametric regression, but only accepts one variable
library(mblm)
mod1 = mblm(d15N ~ Distance, data=soil.data)
mod2 = mblm(d15N ~ GWC, data=soil.data)
AIC(mod1, mod2)
summary(mod1)

# calculate variance inflation factor for correlated variables total N and GWC
library(car)
library(rms)
library(DAAG)
rms::vif(mod12)
car::vif(mod12)
DAAG::vif(mod12)
# looks like it's okay to include both total N and GWC, only .4 correlation anyways

#####################################################################
# dC13 - here, include total N but do not include GWC
#####################################################################
# select data
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
soil.data <- soil.data[ which(soil.data$Lab == "WSU"),]

# remove bog sample
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Hansen"),]

# select only 1-20 m
soil.data <- soil.data[ which(soil.data$Distance < 21),]

# by organic or mineral soil
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]

# scale variables
test <- scale(soil.data$PercentN, scale=FALSE)
soil.data$PercentN <- test[,1]

test1 <- scale(soil.data$GWC, scale=FALSE)
soil.data$GWC <- test1[,1]

soil.data$decomposing.carcass <- as.factor(soil.data$decomposing.carcass)
soil.data$carcass.deposition <- as.factor(soil.data$carcass.deposition)
soil.data$carcass.load <- as.numeric(soil.data$carcass.load)

# examine effect of distance on dC13 ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(dC13 ~ 
                #Side + 
                ln(Distance) + 
                #Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                PercentN +
                decomposing.carcass + 
                carcass.load + 
                GWC + 
                carcass.deposition,
              data=soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(dC13 ~ ln(Distance) + PercentN + decomposing.carcass + GWC + 
                       carcass.deposition + carcass.load + (1|Stream:Transect), 
                     data=soil.data)
summary(dredge.model)

dredge.model <- lm(dC13 ~ PercentN + carcass.load, data=soil.data)
summary(dredge.model)

dredge.model <- lmer(dC13 ~ PercentN + (1|Stream:Transect), data=soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(dC13 ~ ln(Distance) + PercentN + decomposing.carcass + GWC + 
                                  carcass.deposition + carcass.load + (1|Stream), 
                                data=soil.data))

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# for 1-20 organic soil (deposition variable)
dredge.model <- lm(dC13 ~ decomposing.carcass + PercentN, data=soil.data)
summary(dredge.model)

# for 1-100 organic soil (deposition variable)
dredge.model <- lm(dC13 ~ carcass.deposition + ln(Distance) + PercentN + Side, data=soil.data)
summary(dredge.model)

# for 1-100
dredge.model <- lm(dC13 ~ Side + Side:I(ln(Distance)^3) + PercentN, data=soil.data)
summary(dredge.model)

# for 1-20
dredge.model <- lm(dC13 ~ GWC + ln(Distance) + PercentN + Side + Side:I(ln(Distance)^3), data=soil.data)
summary(dredge.model)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(dredge.model)

# run models
mod1 <- lm(dC13 ~ Side, data=soil.data)
mod2 <- lm(dC13 ~ PercentN, data=soil.data)
mod3 <- lm(dC13 ~ Side + PercentN, data=soil.data)
mod4 <- lm(dC13 ~ ln(Distance), data=soil.data)
mod5 <- lm(dC13 ~ ln(Distance) + PercentN, data=soil.data)
mod6 <- lm(dC13 ~ ln(Distance) + GWC, data=soil.data)
mod7 <- lm(dC13 ~ ln(Distance) + PercentN + GWC, data=soil.data)
mod8 <- lm(dC13 ~ ln(Distance) + Side, data=soil.data)
mod9 <- lm(dC13 ~ ln(Distance) + Side + PercentN, data=soil.data)
mod10 <- lm(dC13 ~ ln(Distance) + Side + GWC, data=soil.data)
mod11 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance), data=soil.data)
mod12 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + PercentN, data=soil.data)
mod13 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + PercentN + GWC, data=soil.data)
mod13a <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=soil.data)
mod14 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + PercentN, data=soil.data)
mod15 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=soil.data)
mod16 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC, data=soil.data)
mod17 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=soil.data)
mod18 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=soil.data)
mod19 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN, data=soil.data)
mod20 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN + GWC, data=soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod21 <- lm(dC13 ~ Side:I(ln(Distance)^2) + PercentN + GWC, data=soil.data)
mod22 <- lm(dC13 ~ Side:I(ln(Distance)^3) + PercentN + GWC, data=soil.data)
mod23 <- lm(dC13 ~ Side:poly(ln(Distance), 5, raw=TRUE) + PercentN + GWC, data=soil.data)
mod24 <- lm(dC13 ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC, data=soil.data)
mod25 <- lm(dC13 ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC, data=soil.data)
mod26 <- lm(dC13 ~ Side:poly(ln(Distance), 6, raw=TRUE) + PercentN + GWC, data=soil.data)
mod27 <- lm(dC13 ~ Side:poly(ln(Distance), 5, raw=TRUE), data=soil.data)
mod28 <- lm(dC13 ~ Side:poly(ln(Distance), 4, raw=TRUE), data=soil.data)
mod29 <- lm(dC13 ~ Side:poly(ln(Distance), 3, raw=TRUE), data=soil.data)
mod30 <- lm(dC13 ~ Side:poly(ln(Distance), 6, raw=TRUE), data=soil.data)

results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11, 
               mod12, mod13, mod13a, mod14, mod15, mod16, mod17, mod18, mod19, mod20, mod21, mod22,
               mod23, mod24, mod25, mod26, mod27, mod28, mod29, mod30, step.model,
               dredge.model)
#AIC(mod2, mod4, mod5, mod6, mod7)
ordered <- results[order(-results$AIC),]
ordered
summary(mod9)

# for organic soil dC13 1-100, dredge is the best model
# for organic soil dC13 1-20, dredge is best

select.mod <- dredge.model

# plot raw data with model estimates for LEFT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      PercentN = mean(soil.data$PercentN[which(soil.data$Side==which.side)]), 
                      GWC=mean(soil.data$GWC[which(soil.data$Side==which.side)]),
                      decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
  y = dC13, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "dC13", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-29.81, -25.9) + 
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

# plot raw data with model estimates for RIGHT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      PercentN = mean(soil.data$PercentN[which(soil.data$Side==which.side)]), 
                      GWC=mean(soil.data$GWC[which(soil.data$Side==which.side)]),
                      decomposing.carcass="yes")
soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
soil.pred1 <- as.data.frame(soil.pred1)
soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side==which.side),], aes(x = Distance, 
  y = dC13, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "dC13", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-29.81, -25.9) + 
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "dC13_20_orgsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "dC13_20_orgsoil_R.jpg")

# for organic dC13 1-100 limits, use ylim(-29.81, -25.9)
# for organic dC13 1-20 limits, use ylim(-29.81, -25.9)
  
############################################
# GWC
############################################

mod1 <- lm(GWC ~ Side, data=soil.data)
mod2 <- lm(GWC ~ ln(Distance), data=soil.data)
mod3 <- lm(GWC ~ ln(Distance) + Side, data=soil.data)
mod4 <- lm(GWC ~ Side + ln(Distance) + Side:ln(Distance), data=soil.data)
mod5 <- lm(GWC ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=soil.data)

AIC(mod1, mod2, mod3, mod4, mod5)
summary(mod5)

# plot raw data with model estimates
pred_interval <- seq(1,100,1)
example <- data.frame(Distance=pred_interval, Side="SR")
pred1 <- predict(mod3, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_line() + 
  geom_ribbon(data=pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side=="SR"),], aes(x = Distance, 
  y = GWC, group=Distance, color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
 fill = NA, linewidth = 1.2)) + labs(y = "GWC", x="Distance") +
  ylim(0.75, 8.8)# +
  scale_x_reverse()

# for mineral GWC 1-100, 1-20, and 1-10,use ylim(0.15, 3.75)
# for organic GWC 1-100, use ylim(0.75, 8.8)

##########################################
# total N #
#########################################

# total N
mod1 <- lm(total.N ~ Side, data=soil.data)
mod2 <- lm(total.N ~ Side + GWC, data=soil.data)
mod3 <- lm(total.N ~ ln(Distance) + GWC, data=soil.data)
mod4 <- lm(total.N ~ ln(Distance), data=soil.data)
mod5 <- lm(total.N ~ ln(Distance) + Side, data=soil.data)
mod6 <- lm(total.N ~ ln(Distance) + Side + GWC, data=soil.data)
mod7 <- lm(total.N ~ GWC, data=soil.data)
mod8 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=soil.data)
mod9 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance), data=soil.data)
mod10 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=soil.data)
mod11 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2), data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11)
# top models are 10 and 8
summary(mod11)
  
# model diagnostics
# see if residuals are normally distributed
plot(density(mod8$residuals))
qqnorm(mod8$residuals)
qqline(mod8$residuals)
shapiro.test(mod8$residuals)
# plot residuals against predicted values to determine heteroscedacitty
plot(fitted(mod8),mod8$residuals)
abline(0,0)
  
# plot raw data with model estimates
pred_interval <- seq(1,100,1)
example <- data.frame(Distance=pred_interval, Side="SR")
pred1 <- predict(mod11, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_line() + 
  geom_ribbon(data=pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = soil.data[which(soil.data$Side=="SR"),], aes(x = Distance, 
  y = total.N, group=Distance, color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "total N", x="Distance")# +
  ylim(0.75, 8.8)# +
  scale_x_reverse()  

####################################################################
# Organic soil C:N, d15N and dC13 by distance at Happy
####################################################################
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# remove bog
soil.data <- soil.data[ which(soil.data$Side != "Bog"),]

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Happy"),]

# by type
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]

# C:N
mod1 <- lm(C.N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(C.N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(C.N ~ ln(Distance), data=soil.data)
mod4 <- lm(C.N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(C.N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(C.N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 2,3,4
summary(mod1)


# non parametric regression
library(mblm)
mod1 = mblm(C.N ~ Distance, data=soil.data)
mod2 = mblm(C.N ~ GWC, data=soil.data)
AIC(mod1, mod2)
summary(mod1)

# plot raw data with model estimates
pred_interval <- c(1,3,6,10,20,40,60,100)
example <- data.frame(Distance=pred_interval, GWC=mean(soil.data$GWC))
pred1 <- predict(mod1, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_point() + geom_line() + 
  geom_errorbar(aes(x=Distance, ymin=lwr, ymax=upr)) + 
  geom_point(data = soil.data, aes(x = Distance, y = C.N, group=Distance, 
             color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance")

# d15N
mod1 <- lm(d15N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(d15N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(d15N ~ ln(Distance), data=soil.data)
mod4 <- lm(d15N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(d15N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(d15N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# all are great - NO EFFECT
summary(mod1)

# dC13
mod1 <- lm(dC13 ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(dC13 ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(dC13 ~ ln(Distance), data=soil.data)
mod4 <- lm(dC13 ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(dC13 ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(dC13 ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 4 and 5 best
summary(mod4)

# plot raw data with model estimates
pred_interval <- c(1,3,6,10,20,40,60,100)
example <- data.frame(Distance=pred_interval, GWC=mean(soil.data$GWC))
pred1 <- predict(mod4, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_point() + geom_line() + 
  geom_errorbar(aes(x=Distance, ymin=lwr, ymax=upr)) + 
  geom_point(data = soil.data, aes(x = Distance, y = dC13, group=Distance, 
   color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "dC13", x="Distance")

# total N
mod1 <- lm(total.N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(total.N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(total.N ~ ln(Distance), data=soil.data)
mod4 <- lm(total.N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(total.N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(total.N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
summary(mod5)

####################################################################
# Organic soil C:N, d15N and dC13 by distance at Yako
####################################################################
soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# make distance numeric
soil.data$Distance <- as.numeric(soil.data$Distance)

# by stream 
soil.data <- soil.data[ which(soil.data$Stream=="Yako"),]

# by type
soil.data <- soil.data[ which(soil.data$Type=="Organic"),]

# C:N
mod1 <- lm(C.N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(C.N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(C.N ~ ln(Distance), data=soil.data)
mod4 <- lm(C.N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(C.N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(C.N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 2,6
summary(mod6)

# d15N
mod1 <- lm(d15N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(d15N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(d15N ~ ln(Distance), data=soil.data)
mod4 <- lm(d15N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(d15N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(d15N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 1,3
summary(mod2)

# dC13
mod1 <- lm(dC13 ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(dC13 ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(dC13 ~ ln(Distance), data=soil.data)
mod4 <- lm(dC13 ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(dC13 ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(dC13 ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 1 and 2 best
summary(mod6)

# total N
mod1 <- lm(total.N ~ ln(Distance) + GWC, data=soil.data)
mod2 <- lm(total.N ~ I(ln(Distance)^2) + GWC, data=soil.data)
mod3 <- lm(total.N ~ ln(Distance), data=soil.data)
mod4 <- lm(total.N ~ I(ln(Distance)^2), data=soil.data)
mod5 <- lm(total.N ~ I(ln(Distance)^3), data=soil.data)
mod6 <- lm(total.N ~ GWC, data=soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 1 and 2 best
summary(mod1)

